]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/suites/rbd: rbd-mirror fsx stress test 21697/head
authorJason Dillaman <dillaman@redhat.com>
Tue, 24 Apr 2018 13:00:18 +0000 (09:00 -0400)
committerJason Dillaman <dillaman@redhat.com>
Fri, 27 Apr 2018 12:34:42 +0000 (08:34 -0400)
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
13 files changed:
qa/suites/rbd/mirror-thrash/cluster/2-node.yaml
qa/suites/rbd/mirror-thrash/users/mirror.yaml
qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml [new file with mode: 0644]
qa/suites/rbd/mirror/cluster [new symlink]
qa/suites/rbd/mirror/cluster/2-node.yaml [deleted file]
qa/tasks/rbd_fsx.py
qa/tasks/rbd_mirror_thrash.py
qa/workunits/rbd/rbd_mirror.sh
qa/workunits/rbd/rbd_mirror_fsx_compare.sh [new file with mode: 0755]
qa/workunits/rbd/rbd_mirror_fsx_prepare.sh [new file with mode: 0755]
qa/workunits/rbd/rbd_mirror_ha.sh
qa/workunits/rbd/rbd_mirror_helpers.sh
qa/workunits/rbd/rbd_mirror_stress.sh

index 700c029dfafc221ba325092cbe910d36771993b6..74f9fb3c466f36014ed408c6fcc939fe4e9e9e17 100644 (file)
@@ -18,8 +18,14 @@ roles:
   - cluster1.client.mirror.1
   - cluster1.client.mirror.2
   - cluster1.client.mirror.3
+  - cluster1.client.mirror.4
+  - cluster1.client.mirror.5
+  - cluster1.client.mirror.6
   - cluster2.client.mirror
   - cluster2.client.mirror.0
   - cluster2.client.mirror.1
   - cluster2.client.mirror.2
   - cluster2.client.mirror.3
+  - cluster2.client.mirror.4
+  - cluster2.client.mirror.5
+  - cluster2.client.mirror.6
index 491512247974659d8ab516ee607d08cef6280976..9466fa1f7d397ef908f84edc4d9283b55418f2c8 100644 (file)
@@ -3,6 +3,9 @@ meta:
 overrides:
   ceph:
     conf:
+      client:
+        rbd default features: 125
+        debug rbd_mirror: 15
       # override to make these names predictable
       client.mirror.0:
         admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml
new file mode 100644 (file)
index 0000000..6b7dd40
--- /dev/null
@@ -0,0 +1,32 @@
+meta:
+- desc: run multiple FSX workloads to simulate cluster load and then verify
+        that the images were replicated
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_fsx_prepare.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_NOCLEANUP: '1'
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+- rbd_fsx:
+    clients:
+      - cluster1.client.mirror.0
+      - cluster1.client.mirror.1
+      - cluster1.client.mirror.2
+      - cluster1.client.mirror.3
+      - cluster1.client.mirror.4
+      - cluster1.client.mirror.5
+    ops: 20000
+    keep_images: true
+    pool_name: mirror
+- workunit:
+    clients:
+      cluster1.client.mirror: [rbd/rbd_mirror_fsx_compare.sh]
+    env:
+      # override workunit setting of CEPH_ARGS='--cluster'
+      CEPH_ARGS: ''
+      RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
diff --git a/qa/suites/rbd/mirror/cluster b/qa/suites/rbd/mirror/cluster
new file mode 120000 (symlink)
index 0000000..3fc87a1
--- /dev/null
@@ -0,0 +1 @@
+../mirror-thrash/cluster
\ No newline at end of file
diff --git a/qa/suites/rbd/mirror/cluster/2-node.yaml b/qa/suites/rbd/mirror/cluster/2-node.yaml
deleted file mode 100644 (file)
index 74f9fb3..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-meta:
-- desc: 2 ceph clusters with 1 mon and 3 osds each
-roles:
-- - cluster1.mon.a
-  - cluster1.mgr.x
-  - cluster2.mgr.x
-  - cluster1.osd.0
-  - cluster1.osd.1
-  - cluster1.osd.2
-  - cluster1.client.0
-  - cluster2.client.0
-- - cluster2.mon.a
-  - cluster2.osd.0
-  - cluster2.osd.1
-  - cluster2.osd.2
-  - cluster1.client.mirror
-  - cluster1.client.mirror.0
-  - cluster1.client.mirror.1
-  - cluster1.client.mirror.2
-  - cluster1.client.mirror.3
-  - cluster1.client.mirror.4
-  - cluster1.client.mirror.5
-  - cluster1.client.mirror.6
-  - cluster2.client.mirror
-  - cluster2.client.mirror.0
-  - cluster2.client.mirror.1
-  - cluster2.client.mirror.2
-  - cluster2.client.mirror.3
-  - cluster2.client.mirror.4
-  - cluster2.client.mirror.5
-  - cluster2.client.mirror.6
index 7dae4478a465594b33837100ee8920ce60bf8199..d32475ecd03c9c710476b20d54f72aa9c388212f 100644 (file)
@@ -4,6 +4,7 @@ Run fsx on an rbd image
 import contextlib
 import logging
 
+from teuthology.orchestra import run
 from teuthology.parallel import parallel
 from teuthology import misc as teuthology
 
@@ -68,8 +69,15 @@ def _run_one_client(ctx, config, role):
             config.get('valgrind')
         )
 
+    cluster_name, type_, client_id = teuthology.split_role(role)
+    if type_ != 'client':
+        msg = 'client role ({0}) must be a client'.format(role)
+        raise ConfigError(msg)
+
     args.extend([
         'ceph_test_librbd_fsx',
+        '--cluster', cluster_name,
+        '--id', client_id,
         '-d', # debug output for all operations
         '-W', '-R', # mmap doesn't work with rbd
         '-p', str(config.get('progress_interval', 100)), # show progress
@@ -96,8 +104,10 @@ def _run_one_client(ctx, config, role):
         args.append('-g') # -g deep copy instead of clone
     if config.get('journal_replay', False):
         args.append('-j') # -j replay all IO events from journal
+    if config.get('keep_images', False):
+        args.append('-k') # -k keep images on success
     args.extend([
-        'pool_{pool}'.format(pool=role),
+        config.get('pool_name', 'pool_{pool}'.format(pool=role)),
         'image_{image}'.format(image=role),
     ])
 
index 5c92f50714a8d91929bc321c8abfa297ee2ee387..6e35c3140147d5e1ad1057ba515176b188e831c2 100644 (file)
@@ -34,7 +34,10 @@ class RBDMirrorThrasher(Greenlet):
     max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per
       cluster will be thrashed at any given time.
 
-    max_thrash_delay: [default: 30] maximum number of seconds to delay before
+    min_thrash_delay: [default: 60] minimum number of seconds to delay before
+      thrashing again.
+
+    max_thrash_delay: [default: 120] maximum number of seconds to delay before
       thrashing again.
 
     max_revive_delay: [default: 10] maximum number of seconds to delay before
@@ -71,7 +74,8 @@ class RBDMirrorThrasher(Greenlet):
 
         self.randomize = bool(self.config.get('randomize', True))
         self.max_thrash = int(self.config.get('max_thrash', 1))
-        self.max_thrash_delay = float(self.config.get('thrash_delay', 60.0))
+        self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0))
+        self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0))
         self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
 
     def _run(self):
@@ -101,7 +105,7 @@ class RBDMirrorThrasher(Greenlet):
         while not self.stopping.is_set():
             delay = self.max_thrash_delay
             if self.randomize:
-                delay = random.randrange(0.0, self.max_thrash_delay)
+                delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
 
             if delay > 0.0:
                 self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
@@ -114,14 +118,10 @@ class RBDMirrorThrasher(Greenlet):
             weight = 1.0 / len(self.daemons)
             count = 0
             for daemon in self.daemons:
-                # if we've reached max_thrash, we're done
-                count = count + 1
-                if count > self.max_thrash:
-                    break
-
-                skip = random.randrange(0.0, 1.0)
+                skip = random.uniform(0.0, 1.0)
                 if weight <= skip:
-                    self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
+                    self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+                        label=daemon.id_, skip=skip, weight=weight))
                     continue
 
                 self.log('kill {label}'.format(label=daemon.id_))
@@ -129,6 +129,11 @@ class RBDMirrorThrasher(Greenlet):
                 killed_daemons.append(daemon)
                 stats['kill'] += 1
 
+                # if we've reached max_thrash, we're done
+                count += 1
+                if count >= self.max_thrash:
+                    break
+
             if killed_daemons:
                 # wait for a while before restarting
 
index ee8044ef239a2709948c66224e3cec87f689f7cb..655453f64cff49973c50b4f9b2e7c002c1ea80bc 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -ex
 #
 # rbd_mirror.sh - test rbd-mirror daemon
 #
@@ -9,6 +9,8 @@
 
 . $(dirname $0)/rbd_mirror_helpers.sh
 
+setup
+
 testlog "TEST: add image and test replay"
 start_mirrors ${CLUSTER1}
 image=test
@@ -436,5 +438,3 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
 fi
-
-echo OK
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
new file mode 100755 (executable)
index 0000000..71d9a44
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+trap cleanup INT TERM EXIT
+
+setup_tempdir
+
+testlog "TEST: snapshot all pool images"
+snap_id=`uuidgen`
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    create_snapshot ${CLUSTER1} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: wait for snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    wait_for_snap_present ${CLUSTER2} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: compare image snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+    compare_image_snapshots ${POOL} ${image}
+done
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
new file mode 100755 (executable)
index 0000000..d988987
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
index fc08c1dbe601766111b300eab250303c59266ab8..9ee0cb98f72fa073e52c971361ce6db5f60d44a0 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -ex
 #
 # rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
 #
@@ -7,6 +7,8 @@ RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7}
 
 . $(dirname $0)/rbd_mirror_helpers.sh
 
+setup
+
 is_leader()
 {
     local instance=$1
@@ -205,5 +207,3 @@ for i in 0 1 2 3 4 5; do
 done
 
 stop_mirror ${CLUSTER1}:${LEADER}
-
-echo OK
index a81d8989c05e35f65037c20586933b0fbdee7b72..5a58bae11189132ab511d8b380a44c82515e64c2 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh -x
+#!/bin/sh
 #
 # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
 #
@@ -245,11 +245,8 @@ setup_pools()
     rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} ${remote_cluster}
 }
 
-setup()
+setup_tempdir()
 {
-    local c
-    trap cleanup INT TERM EXIT
-
     if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
        test -d "${RBD_MIRROR_TEMDIR}" ||
        mkdir "${RBD_MIRROR_TEMDIR}"
@@ -258,7 +255,14 @@ setup()
     else
        TEMPDIR=`mktemp -d`
     fi
+}
+
+setup()
+{
+    local c
+    trap 'cleanup $?' INT TERM EXIT
 
+    setup_tempdir
     if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
        setup_cluster "${CLUSTER1}"
        setup_cluster "${CLUSTER2}"
@@ -273,27 +277,41 @@ setup()
 
 cleanup()
 {
-    test  -n "${RBD_MIRROR_NOCLEANUP}" && return
-    local cluster instance
+    local error_code=$1
 
     set +e
 
-    for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
-       stop_mirrors "${cluster}"
-    done
+    if [ "${error_code}" -ne 0 ]; then
+        status
+    fi
 
-    if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
-        cd ${CEPH_ROOT}
-        CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
-        CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+    if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then
+        local cluster instance
+
+        for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
+           stop_mirrors "${cluster}"
+        done
+
+        if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+            cd ${CEPH_ROOT}
+            CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
+            CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+        else
+            CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+            CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+            CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+            CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+        fi
+        test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR}
+    fi
+
+    if [ "${error_code}" -eq 0 ]; then
+        echo "OK"
     else
-        CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
-        CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
-        CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
-        CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+        echo "FAIL"
     fi
-    test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" ||
-    rm -Rf ${TEMPDIR}
+
+    exit ${error_code}
 }
 
 start_mirror()
@@ -507,10 +525,11 @@ test_image_replay_state()
     local pool=$2
     local image=$3
     local test_state=$4
+    local status_result
     local current_state=stopped
 
-    admin_daemons "${cluster}" rbd mirror status ${pool}/${image} |
-       grep -i 'state.*Replaying' && current_state=started
+    status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1
+    echo "${status_result}" | grep -i 'Replaying' && current_state=started
     test "${test_state}" = "${current_state}"
 }
 
@@ -858,6 +877,23 @@ compare_images()
     rm -f ${rmt_export} ${loc_export}
 }
 
+compare_image_snapshots()
+{
+    local pool=$1
+    local image=$2
+
+    local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+    local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+    for snap_name in $(rbd --cluster ${CLUSTER1} -p ${pool} snap list ${image}); do
+        rm -f ${rmt_export} ${loc_export}
+        rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export}
+        rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export}
+        cmp ${rmt_export} ${loc_export}
+    done
+    rm -f ${rmt_export} ${loc_export}
+}
+
 demote_image()
 {
     local cluster=$1
@@ -994,7 +1030,3 @@ then
     $@
     exit $?
 fi
-
-set -xe
-
-setup
index f21984eeee448deed9f3cbb1732c344c11024b4b..a1d7d03470a61c858a22af319419a31ab3cfe6e1 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -ex
 #
 # rbd_mirror_stress.sh - stress test rbd-mirror daemon
 #
@@ -13,6 +13,8 @@ export LOCKDEP=0
 
 . $(dirname $0)/rbd_mirror_helpers.sh
 
+setup
+
 create_snap()
 {
     local cluster=$1
@@ -182,5 +184,3 @@ do
   purge_snapshots ${CLUSTER2} ${POOL} ${image}
   remove_image_retry ${CLUSTER2} ${POOL} ${image}
 done
-
-echo OK