- cluster1.client.mirror.1
- cluster1.client.mirror.2
- cluster1.client.mirror.3
+ - cluster1.client.mirror.4
+ - cluster1.client.mirror.5
+ - cluster1.client.mirror.6
- cluster2.client.mirror
- cluster2.client.mirror.0
- cluster2.client.mirror.1
- cluster2.client.mirror.2
- cluster2.client.mirror.3
+ - cluster2.client.mirror.4
+ - cluster2.client.mirror.5
+ - cluster2.client.mirror.6
overrides:
ceph:
conf:
+ client:
+ rbd default features: 125
+ debug rbd_mirror: 15
# override to make these names predictable
client.mirror.0:
admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok
--- /dev/null
+meta:
+- desc: run multiple FSX workloads to simulate cluster load and then verify
+ that the images were replicated
+tasks:
+- workunit:
+ clients:
+ cluster1.client.mirror: [rbd/rbd_mirror_fsx_prepare.sh]
+ env:
+ # override workunit setting of CEPH_ARGS='--cluster'
+ CEPH_ARGS: ''
+ RBD_MIRROR_NOCLEANUP: '1'
+ RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+ RBD_MIRROR_USE_RBD_MIRROR: '1'
+- rbd_fsx:
+ clients:
+ - cluster1.client.mirror.0
+ - cluster1.client.mirror.1
+ - cluster1.client.mirror.2
+ - cluster1.client.mirror.3
+ - cluster1.client.mirror.4
+ - cluster1.client.mirror.5
+ ops: 20000
+ keep_images: true
+ pool_name: mirror
+- workunit:
+ clients:
+ cluster1.client.mirror: [rbd/rbd_mirror_fsx_compare.sh]
+ env:
+ # override workunit setting of CEPH_ARGS='--cluster'
+ CEPH_ARGS: ''
+ RBD_MIRROR_USE_EXISTING_CLUSTER: '1'
+ RBD_MIRROR_USE_RBD_MIRROR: '1'
--- /dev/null
+../mirror-thrash/cluster
\ No newline at end of file
+++ /dev/null
-meta:
-- desc: 2 ceph clusters with 1 mon and 3 osds each
-roles:
-- - cluster1.mon.a
- - cluster1.mgr.x
- - cluster2.mgr.x
- - cluster1.osd.0
- - cluster1.osd.1
- - cluster1.osd.2
- - cluster1.client.0
- - cluster2.client.0
-- - cluster2.mon.a
- - cluster2.osd.0
- - cluster2.osd.1
- - cluster2.osd.2
- - cluster1.client.mirror
- - cluster1.client.mirror.0
- - cluster1.client.mirror.1
- - cluster1.client.mirror.2
- - cluster1.client.mirror.3
- - cluster1.client.mirror.4
- - cluster1.client.mirror.5
- - cluster1.client.mirror.6
- - cluster2.client.mirror
- - cluster2.client.mirror.0
- - cluster2.client.mirror.1
- - cluster2.client.mirror.2
- - cluster2.client.mirror.3
- - cluster2.client.mirror.4
- - cluster2.client.mirror.5
- - cluster2.client.mirror.6
import contextlib
import logging
+from teuthology.orchestra import run
from teuthology.parallel import parallel
from teuthology import misc as teuthology
config.get('valgrind')
)
+ cluster_name, type_, client_id = teuthology.split_role(role)
+ if type_ != 'client':
+ msg = 'client role ({0}) must be a client'.format(role)
+ raise ConfigError(msg)
+
args.extend([
'ceph_test_librbd_fsx',
+ '--cluster', cluster_name,
+ '--id', client_id,
'-d', # debug output for all operations
'-W', '-R', # mmap doesn't work with rbd
'-p', str(config.get('progress_interval', 100)), # show progress
args.append('-g') # -g deep copy instead of clone
if config.get('journal_replay', False):
args.append('-j') # -j replay all IO events from journal
+ if config.get('keep_images', False):
+ args.append('-k') # -k keep images on success
args.extend([
- 'pool_{pool}'.format(pool=role),
+ config.get('pool_name', 'pool_{pool}'.format(pool=role)),
'image_{image}'.format(image=role),
])
max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per
cluster will be thrashed at any given time.
- max_thrash_delay: [default: 30] maximum number of seconds to delay before
+ min_thrash_delay: [default: 60] minimum number of seconds to delay before
+ thrashing again.
+
+ max_thrash_delay: [default: 120] maximum number of seconds to delay before
thrashing again.
max_revive_delay: [default: 10] maximum number of seconds to delay before
self.randomize = bool(self.config.get('randomize', True))
self.max_thrash = int(self.config.get('max_thrash', 1))
- self.max_thrash_delay = float(self.config.get('thrash_delay', 60.0))
+ self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0))
+ self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0))
self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))
def _run(self):
while not self.stopping.is_set():
delay = self.max_thrash_delay
if self.randomize:
- delay = random.randrange(0.0, self.max_thrash_delay)
+ delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
if delay > 0.0:
self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
weight = 1.0 / len(self.daemons)
count = 0
for daemon in self.daemons:
- # if we've reached max_thrash, we're done
- count = count + 1
- if count > self.max_thrash:
- break
-
- skip = random.randrange(0.0, 1.0)
+ skip = random.uniform(0.0, 1.0)
if weight <= skip:
- self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
+ self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+ label=daemon.id_, skip=skip, weight=weight))
continue
self.log('kill {label}'.format(label=daemon.id_))
killed_daemons.append(daemon)
stats['kill'] += 1
+ # if we've reached max_thrash, we're done
+ count += 1
+ if count >= self.max_thrash:
+ break
+
if killed_daemons:
# wait for a while before restarting
-#!/bin/sh
+#!/bin/sh -ex
#
# rbd_mirror.sh - test rbd-mirror daemon
#
. $(dirname $0)/rbd_mirror_helpers.sh
+setup
+
testlog "TEST: add image and test replay"
start_mirrors ${CLUSTER1}
image=test
CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blacklist ls 2>&1 | grep -q "listed 0 entries"
fi
-
-echo OK
--- /dev/null
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+trap cleanup INT TERM EXIT
+
+setup_tempdir
+
+testlog "TEST: snapshot all pool images"
+snap_id=`uuidgen`
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ create_snapshot ${CLUSTER1} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: wait for snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ wait_for_snap_present ${CLUSTER2} ${POOL} ${image} ${snap_id}
+done
+
+testlog "TEST: compare image snapshots"
+for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do
+ compare_image_snapshots ${POOL} ${image}
+done
--- /dev/null
+#!/bin/sh -ex
+#
+# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
+#
+# The script is used to compare FSX-generated images between two clusters.
+#
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+setup
-#!/bin/sh
+#!/bin/sh -ex
#
# rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
#
. $(dirname $0)/rbd_mirror_helpers.sh
+setup
+
is_leader()
{
local instance=$1
done
stop_mirror ${CLUSTER1}:${LEADER}
-
-echo OK
-#!/bin/sh -x
+#!/bin/sh
#
# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
#
rbd --cluster ${cluster} mirror pool peer add ${PARENT_POOL} ${remote_cluster}
}
-setup()
+setup_tempdir()
{
- local c
- trap cleanup INT TERM EXIT
-
if [ -n "${RBD_MIRROR_TEMDIR}" ]; then
test -d "${RBD_MIRROR_TEMDIR}" ||
mkdir "${RBD_MIRROR_TEMDIR}"
else
TEMPDIR=`mktemp -d`
fi
+}
+
+setup()
+{
+ local c
+ trap 'cleanup $?' INT TERM EXIT
+ setup_tempdir
if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
setup_cluster "${CLUSTER1}"
setup_cluster "${CLUSTER2}"
cleanup()
{
- test -n "${RBD_MIRROR_NOCLEANUP}" && return
- local cluster instance
+ local error_code=$1
set +e
- for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
- stop_mirrors "${cluster}"
- done
+ if [ "${error_code}" -ne 0 ]; then
+ status
+ fi
- if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
- cd ${CEPH_ROOT}
- CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
- CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+ if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then
+ local cluster instance
+
+ for cluster in "${CLUSTER1}" "${CLUSTER2}"; do
+ stop_mirrors "${cluster}"
+ done
+
+ if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then
+ cd ${CEPH_ROOT}
+ CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1}
+ CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2}
+ else
+ CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+ CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+ fi
+ test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR}
+ fi
+
+ if [ "${error_code}" -eq 0 ]; then
+ echo "OK"
else
- CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
- CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it
- CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
- CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it
+ echo "FAIL"
fi
- test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" ||
- rm -Rf ${TEMPDIR}
+
+ exit ${error_code}
}
start_mirror()
local pool=$2
local image=$3
local test_state=$4
+ local status_result
local current_state=stopped
- admin_daemons "${cluster}" rbd mirror status ${pool}/${image} |
- grep -i 'state.*Replaying' && current_state=started
+ status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1
+ echo "${status_result}" | grep -i 'Replaying' && current_state=started
test "${test_state}" = "${current_state}"
}
rm -f ${rmt_export} ${loc_export}
}
+compare_image_snapshots()
+{
+ local pool=$1
+ local image=$2
+
+ local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export
+ local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export
+
+ for snap_name in $(rbd --cluster ${CLUSTER1} -p ${pool} snap list ${image}); do
+ rm -f ${rmt_export} ${loc_export}
+ rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export}
+ rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export}
+ cmp ${rmt_export} ${loc_export}
+ done
+ rm -f ${rmt_export} ${loc_export}
+}
+
demote_image()
{
local cluster=$1
$@
exit $?
fi
-
-set -xe
-
-setup
-#!/bin/sh
+#!/bin/sh -ex
#
# rbd_mirror_stress.sh - stress test rbd-mirror daemon
#
. $(dirname $0)/rbd_mirror_helpers.sh
+setup
+
create_snap()
{
local cluster=$1
purge_snapshots ${CLUSTER2} ${POOL} ${image}
remove_image_retry ${CLUSTER2} ${POOL} ${image}
done
-
-echo OK