From 70c97bd07e6764e1c6ff83225f6a2a9dcdfb989e Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 9 Jun 2016 16:23:52 -0400 Subject: [PATCH] qa/workunits/rbd: additional rbd-mirror stress tests Fixes: http://tracker.ceph.com/issues/16197 Signed-off-by: Jason Dillaman (cherry picked from commit db3e583a5fe4a7985b1e7f1740114da414835af5) --- qa/workunits/rbd/rbd_mirror.sh | 642 +---------------------- qa/workunits/rbd/rbd_mirror_helpers.sh | 683 +++++++++++++++++++++++++ qa/workunits/rbd/rbd_mirror_stress.sh | 467 ++++------------- 3 files changed, 790 insertions(+), 1002 deletions(-) create mode 100755 qa/workunits/rbd/rbd_mirror_helpers.sh diff --git a/qa/workunits/rbd/rbd_mirror.sh b/qa/workunits/rbd/rbd_mirror.sh index ceb431b1c25ed..2408cc8738237 100755 --- a/qa/workunits/rbd/rbd_mirror.sh +++ b/qa/workunits/rbd/rbd_mirror.sh @@ -6,646 +6,14 @@ # creates a temporary directory, used for cluster configs, daemon logs, admin # socket, temporary files, and launches rbd-mirror daemon. # -# There are several env variables useful when troubleshooting a test failure: -# -# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes, -# destroy the clusters and remove the temp directory) -# on exit, so it is possible to check the test state -# after failure. -# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory -# (should not exist) instead of running mktemp(1). -# -# The cleanup can be done as a separate step, running the script with -# `cleanup ${RBD_MIRROR_TEMDIR}' arguments. -# -# Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries -# in PATH. -# -# Thus a typical troubleshooting session: -# -# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with -# TEMPDIR pointing to a known location: -# -# cd $CEPH_SRC_PATH -# PATH=$CEPH_SRC_PATH:$PATH -# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror.sh -# -# After the test failure cd to TEMPDIR and check the current state: -# -# cd /tmp/tmp.rbd_mirror -# ls -# less rbd-mirror.cluster1_daemon.$pid.log -# ceph --cluster cluster1 -s -# ceph --cluster cluster1 -s -# rbd --cluster cluster2 -p mirror ls -# rbd --cluster cluster2 -p mirror journal status --image test -# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help -# ... -# -# Also you can execute commands (functions) from the script: -# -# cd $CEPH_SRC_PATH -# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror -# ../qa/workunits/rbd/rbd_mirror.sh status -# ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1 -# ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2 -# ../qa/workunits/rbd/rbd_mirror.sh flush cluster2 -# ... -# -# Eventually, run the cleanup: -# -# cd $CEPH_SRC_PATH -# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror.sh cleanup -# - -CLUSTER1=cluster1 -CLUSTER2=cluster2 -POOL=mirror -PARENT_POOL=mirror_parent -SRC_DIR=$(readlink -f $(dirname $0)/../../../src) -TEMPDIR= - -# These vars facilitate running this script in an environment with -# ceph installed from packages, like teuthology. These are not defined -# by default. -# -# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters -# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror -# running as ceph client $CEPH_ID. If empty, -# this script will start and stop rbd-mirror - -# -# Functions -# - -daemon_asok_file() -{ - local local_cluster=$1 - local cluster=$2 - - if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - echo $(ceph-conf --cluster $local_cluster --name "client.${CEPH_ID}" 'admin socket') - else - echo "${TEMPDIR}/rbd-mirror.${local_cluster}_daemon.${cluster}.asok" - fi -} - -daemon_pid_file() -{ - local cluster=$1 - - if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - echo $(ceph-conf --cluster $cluster --name "client.${CEPH_ID}" 'pid file') - else - echo "${TEMPDIR}/rbd-mirror.${cluster}_daemon.pid" - fi -} - -testlog() -{ - echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" -} - -setup() -{ - local c - trap cleanup INT TERM EXIT - - if [ -n "${RBD_MIRROR_TEMDIR}" ]; then - mkdir "${RBD_MIRROR_TEMDIR}" - TEMPDIR="${RBD_MIRROR_TEMDIR}" - else - TEMPDIR=`mktemp -d` - fi - - if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then - cd ${SRC_DIR} - ./mstart.sh ${CLUSTER1} -n - ./mstart.sh ${CLUSTER2} -n - - ln -s $(readlink -f run/${CLUSTER1}/ceph.conf) \ - ${TEMPDIR}/${CLUSTER1}.conf - ln -s $(readlink -f run/${CLUSTER2}/ceph.conf) \ - ${TEMPDIR}/${CLUSTER2}.conf - - cd ${TEMPDIR} - fi - - ceph --cluster ${CLUSTER1} osd pool create ${POOL} 64 64 - ceph --cluster ${CLUSTER1} osd pool create ${PARENT_POOL} 64 64 - ceph --cluster ${CLUSTER2} osd pool create ${PARENT_POOL} 64 64 - ceph --cluster ${CLUSTER2} osd pool create ${POOL} 64 64 - - rbd --cluster ${CLUSTER1} mirror pool enable ${POOL} pool - rbd --cluster ${CLUSTER2} mirror pool enable ${POOL} pool - rbd --cluster ${CLUSTER1} mirror pool enable ${PARENT_POOL} image - rbd --cluster ${CLUSTER2} mirror pool enable ${PARENT_POOL} image - - rbd --cluster ${CLUSTER1} mirror pool peer add ${POOL} ${CLUSTER2} - rbd --cluster ${CLUSTER2} mirror pool peer add ${POOL} ${CLUSTER1} - rbd --cluster ${CLUSTER1} mirror pool peer add ${PARENT_POOL} ${CLUSTER2} - rbd --cluster ${CLUSTER2} mirror pool peer add ${PARENT_POOL} ${CLUSTER1} -} - -cleanup() -{ - test -n "${RBD_MIRROR_NOCLEANUP}" && return - - set +e - - stop_mirror "${CLUSTER1}" - stop_mirror "${CLUSTER2}" - - if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then - cd ${SRC_DIR} - ./mstop.sh ${CLUSTER1} - ./mstop.sh ${CLUSTER2} - else - ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it - ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it - ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it - ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it - fi - rm -Rf ${TEMPDIR} -} - -start_mirror() -{ - local cluster=$1 - - test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return - - rbd-mirror \ - --cluster ${cluster} \ - --pid-file=$(daemon_pid_file "${cluster}") \ - --log-file=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.\$pid.log \ - --admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.asok \ - --debug-rbd=30 --debug-journaler=30 \ - --debug-rbd_mirror=30 \ - --daemonize=true -} - -stop_mirror() -{ - local cluster=$1 - - test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return - - local pid - pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || : - if [ -n "${pid}" ] - then - kill ${pid} - for s in 1 2 4 8 16 32; do - sleep $s - ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break - done - ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' - fi - rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}") - rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}") - rm -f $(daemon_pid_file "${cluster}") -} - -admin_daemon() -{ - local cluster=$1 ; shift - - local asok_file=$(daemon_asok_file "${cluster}" "${cluster}") - test -S "${asok_file}" - - ceph --admin-daemon ${asok_file} $@ -} - -status() -{ - local cluster daemon image_pool image - - for cluster in ${CLUSTER1} ${CLUSTER2} - do - echo "${cluster} status" - ceph --cluster ${cluster} -s - echo - - for image_pool in ${POOL} ${PARENT_POOL} - do - echo "${cluster} ${image_pool} images" - rbd --cluster ${cluster} -p ${image_pool} ls - echo - - echo "${cluster} ${image_pool} mirror pool status" - rbd --cluster ${cluster} -p ${image_pool} mirror pool status --verbose - echo - - for image in `rbd --cluster ${cluster} -p ${image_pool} ls 2>/dev/null` - do - echo "image ${image} info" - rbd --cluster ${cluster} -p ${image_pool} info ${image} - echo - echo "image ${image} journal status" - rbd --cluster ${cluster} -p ${image_pool} journal status --image ${image} - echo - done - done - done - - local ret - - for cluster in "${CLUSTER1}" "${CLUSTER2}" - do - local pid_file=$(daemon_pid_file ${cluster} ) - if [ ! -e ${pid_file} ] - then - echo "${cluster} rbd-mirror not running or unknown" \ - "(${pid_file} not exist)" - continue - fi - - local pid - pid=$(cat ${pid_file} 2>/dev/null) || : - if [ -z "${pid}" ] - then - echo "${cluster} rbd-mirror not running or unknown" \ - "(can't find pid using ${pid_file})" - ret=1 - continue - fi - - echo "${daemon} rbd-mirror process in ps output:" - if ps auxww | - awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}' - then - echo - echo "${cluster} rbd-mirror not running" \ - "(can't find pid $pid in ps output)" - ret=1 - continue - fi - echo - - local asok_file=$(daemon_asok_file ${cluster} ${cluster}) - if [ ! -S "${asok_file}" ] - then - echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)" - ret=1 - continue - fi - - echo "${cluster} rbd-mirror status" - ceph --admin-daemon ${asok_file} rbd mirror status - echo - done - - return ${ret} -} - -flush() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local cmd="rbd mirror flush" - - if [ -n "${image}" ] - then - cmd="${cmd} ${pool}/${image}" - fi - - admin_daemon "${cluster}" ${cmd} -} - -test_image_replay_state() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local test_state=$4 - local current_state=stopped - admin_daemon "${cluster}" help | - fgrep "\"rbd mirror status ${pool}/${image}\"" && - admin_daemon "${cluster}" rbd mirror status ${pool}/${image} | - grep -i 'state.*Replaying' && - current_state=started - - test "${test_state}" = "${current_state}" -} - -wait_for_image_replay_state() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local state=$4 - local s - - # TODO: add a way to force rbd-mirror to update replayers - for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do - sleep ${s} - test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0 - done - return 1 -} - -wait_for_image_replay_started() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started -} - -wait_for_image_replay_stopped() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped -} - -get_position() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local id_regexp=$4 - - # Parse line like below, looking for the first position - # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]] - - local status_log=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.status - rbd --cluster ${cluster} -p ${pool} journal status --image ${image} | - tee ${status_log} >&2 - sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*$/\1/p' \ - ${status_log} -} - -get_master_position() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - get_position "${cluster}" "${pool}" "${image}" '' -} - -get_mirror_position() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - get_position "${cluster}" "${pool}" "${image}" '..*' -} - -wait_for_replay_complete() -{ - local local_cluster=$1 - local cluster=$2 - local pool=$3 - local image=$4 - local s master_pos mirror_pos - - for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16; do - sleep ${s} - flush "${local_cluster}" "${pool}" "${image}" - master_pos=$(get_master_position "${cluster}" "${pool}" "${image}") - mirror_pos=$(get_mirror_position "${cluster}" "${pool}" "${image}") - test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0 - done - return 1 -} - -test_status_in_pool_dir() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local state_pattern=$4 - local description_pattern=$5 - - local status_log=${TEMPDIR}/${cluster}-${image}.mirror_status - rbd --cluster ${cluster} -p ${pool} mirror image status ${image} | - tee ${status_log} - grep "state: .*${state_pattern}" ${status_log} - grep "description: .*${description_pattern}" ${status_log} -} - -create_image() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster ${cluster} -p ${pool} create --size 128 \ - --image-feature layering,exclusive-lock,journaling ${image} -} - -remove_image() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster ${cluster} -p ${pool} rm ${image} -} - -clone_image() -{ - local cluster=$1 - local parent_pool=$2 - local parent_image=$3 - local parent_snap=$4 - local clone_pool=$5 - local clone_image=$6 - - rbd --cluster ${cluster} clone ${parent_pool}/${parent_image}@${parent_snap} \ - ${clone_pool}/${clone_image} --image-feature layering,exclusive-lock,journaling -} - -create_snapshot() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local snap=$4 - - rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap} -} - -remove_snapshot() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local snap=$4 - - rbd --cluster ${cluster} -p ${pool} snap rm ${image}@${snap} -} - -protect_snapshot() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local snap=$4 - - rbd --cluster ${cluster} -p ${pool} snap protect ${image}@${snap} -} - -unprotect_snapshot() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local snap=$4 - - rbd --cluster ${cluster} -p ${pool} snap unprotect ${image}@${snap} -} - -wait_for_snap_present() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local snap_name=$4 - local s - - for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do - sleep ${s} - rbd --cluster ${cluster} -p ${pool} info ${image}@${snap_name} || continue - return 0 - done - return 1 -} - -write_image() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local count=$4 - - rbd --cluster ${cluster} -p ${pool} bench-write ${image} \ - --io-size 4096 --io-threads 1 --io-total $((4096 * count)) \ - --io-pattern rand -} - -compare_images() -{ - local pool=$1 - local image=$2 - - local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export - local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export - - rm -f ${rmt_export} ${loc_export} - rbd --cluster ${CLUSTER2} -p ${pool} export ${image} ${rmt_export} - rbd --cluster ${CLUSTER1} -p ${pool} export ${image} ${loc_export} - cmp ${rmt_export} ${loc_export} -} - -demote_image() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster=${cluster} mirror image demote ${pool}/${image} -} - -promote_image() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster=${cluster} mirror image promote ${pool}/${image} -} - -set_pool_mirror_mode() -{ - local cluster=$1 - local pool=$2 - local mode=$3 - - rbd --cluster=${cluster} -p ${pool} mirror pool enable ${mode} -} - -disable_mirror() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster=${cluster} mirror image disable ${pool}/${image} -} - -enable_mirror() -{ - local cluster=$1 - local pool=$2 - local image=$3 - - rbd --cluster=${cluster} mirror image enable ${pool}/${image} -} - -test_image_present() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local test_state=$4 - local current_state=deleted - - rbd --cluster=${cluster} -p ${pool} ls | grep "^${image}$" && - current_state=present - - test "${test_state}" = "${current_state}" -} - -wait_for_image_present() -{ - local cluster=$1 - local pool=$2 - local image=$3 - local state=$4 - local s - - # TODO: add a way to force rbd-mirror to update replayers - for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do - sleep ${s} - test_image_present "${cluster}" "${pool}" "${image}" "${state}" && return 0 - done - return 1 -} - -# -# Main -# - -if [ "$#" -gt 0 ] -then - if [ -z "${RBD_MIRROR_TEMDIR}" ] - then - echo "RBD_MIRROR_TEMDIR is not set" >&2 - exit 1 - fi - - TEMPDIR="${RBD_MIRROR_TEMDIR}" - cd ${TEMPDIR} - $@ - exit $? +if [ -n "${CEPH_REF}" ]; then + wget -O rbd_mirror_helpers.sh "https://git.ceph.com/?p=ceph.git;a=blob_plain;hb=$CEPH_REF;f=qa/workunits/rbd/rbd_mirror_helpers.sh" + . rbd_mirror_helpers.sh +else + . $(dirname $0)/rbd_mirror_helpers.sh fi -set -xe - -setup - testlog "TEST: add image and test replay" start_mirror ${CLUSTER1} image=test diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh new file mode 100755 index 0000000000000..8094bd8de427e --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -0,0 +1,683 @@ +#!/bin/sh +# +# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions +# +# The scripts starts two ("local" and "remote") clusters using mstart.sh script, +# creates a temporary directory, used for cluster configs, daemon logs, admin +# socket, temporary files, and launches rbd-mirror daemon. +# +# There are several env variables useful when troubleshooting a test failure: +# +# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes, +# destroy the clusters and remove the temp directory) +# on exit, so it is possible to check the test state +# after failure. +# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory +# (should not exist) instead of running mktemp(1). +# +# The cleanup can be done as a separate step, running the script with +# `cleanup ${RBD_MIRROR_TEMDIR}' arguments. +# +# Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries +# in PATH. +# +# Thus a typical troubleshooting session: +# +# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with +# TEMPDIR pointing to a known location: +# +# cd $CEPH_SRC_PATH +# PATH=$CEPH_SRC_PATH:$PATH +# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ +# ../qa/workunits/rbd/rbd_mirror.sh +# +# After the test failure cd to TEMPDIR and check the current state: +# +# cd /tmp/tmp.rbd_mirror +# ls +# less rbd-mirror.cluster1_daemon.$pid.log +# ceph --cluster cluster1 -s +# ceph --cluster cluster1 -s +# rbd --cluster cluster2 -p mirror ls +# rbd --cluster cluster2 -p mirror journal status --image test +# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help +# ... +# +# Also you can execute commands (functions) from the script: +# +# cd $CEPH_SRC_PATH +# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror +# ../qa/workunits/rbd/rbd_mirror.sh status +# ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1 +# ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2 +# ../qa/workunits/rbd/rbd_mirror.sh flush cluster2 +# ... +# +# Eventually, run the cleanup: +# +# cd $CEPH_SRC_PATH +# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ +# ../qa/workunits/rbd/rbd_mirror.sh cleanup +# + +CLUSTER1=cluster1 +CLUSTER2=cluster2 +POOL=mirror +PARENT_POOL=mirror_parent +SRC_DIR=$(readlink -f $(dirname $0)/../../../src) +TEMPDIR= + +# These vars facilitate running this script in an environment with +# ceph installed from packages, like teuthology. These are not defined +# by default. +# +# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters +# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror +# running as ceph client $CEPH_ID. If empty, +# this script will start and stop rbd-mirror + +# +# Functions +# + +daemon_asok_file() +{ + local local_cluster=$1 + local cluster=$2 + + if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + echo $(ceph-conf --cluster $local_cluster --name "client.${CEPH_ID}" 'admin socket') + else + echo "${TEMPDIR}/rbd-mirror.${local_cluster}_daemon.${cluster}.asok" + fi +} + +daemon_pid_file() +{ + local cluster=$1 + + if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + echo $(ceph-conf --cluster $cluster --name "client.${CEPH_ID}" 'pid file') + else + echo "${TEMPDIR}/rbd-mirror.${cluster}_daemon.pid" + fi +} + +testlog() +{ + echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" +} + +setup() +{ + local c + trap cleanup INT TERM EXIT + + if [ -n "${RBD_MIRROR_TEMDIR}" ]; then + mkdir "${RBD_MIRROR_TEMDIR}" + TEMPDIR="${RBD_MIRROR_TEMDIR}" + else + TEMPDIR=`mktemp -d` + fi + + if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then + cd ${SRC_DIR} + ./mstart.sh ${CLUSTER1} -n + ./mstart.sh ${CLUSTER2} -n + + ln -s $(readlink -f run/${CLUSTER1}/ceph.conf) \ + ${TEMPDIR}/${CLUSTER1}.conf + ln -s $(readlink -f run/${CLUSTER2}/ceph.conf) \ + ${TEMPDIR}/${CLUSTER2}.conf + + cd ${TEMPDIR} + fi + + ceph --cluster ${CLUSTER1} osd pool create ${POOL} 64 64 + ceph --cluster ${CLUSTER1} osd pool create ${PARENT_POOL} 64 64 + ceph --cluster ${CLUSTER2} osd pool create ${PARENT_POOL} 64 64 + ceph --cluster ${CLUSTER2} osd pool create ${POOL} 64 64 + + rbd --cluster ${CLUSTER1} mirror pool enable ${POOL} pool + rbd --cluster ${CLUSTER2} mirror pool enable ${POOL} pool + rbd --cluster ${CLUSTER1} mirror pool enable ${PARENT_POOL} image + rbd --cluster ${CLUSTER2} mirror pool enable ${PARENT_POOL} image + + rbd --cluster ${CLUSTER1} mirror pool peer add ${POOL} ${CLUSTER2} + rbd --cluster ${CLUSTER2} mirror pool peer add ${POOL} ${CLUSTER1} + rbd --cluster ${CLUSTER1} mirror pool peer add ${PARENT_POOL} ${CLUSTER2} + rbd --cluster ${CLUSTER2} mirror pool peer add ${PARENT_POOL} ${CLUSTER1} +} + +cleanup() +{ + test -n "${RBD_MIRROR_NOCLEANUP}" && return + + set +e + + stop_mirror "${CLUSTER1}" + stop_mirror "${CLUSTER2}" + + if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then + cd ${SRC_DIR} + ./mstop.sh ${CLUSTER1} + ./mstop.sh ${CLUSTER2} + else + ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it + ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it + ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it + ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it + fi + rm -Rf ${TEMPDIR} +} + +start_mirror() +{ + local cluster=$1 + + test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return + + rbd-mirror \ + --cluster ${cluster} \ + --pid-file=$(daemon_pid_file "${cluster}") \ + --log-file=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.\$pid.log \ + --admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.asok \ + --debug-rbd=30 --debug-journaler=30 \ + --debug-rbd_mirror=30 \ + --daemonize=true +} + +stop_mirror() +{ + local cluster=$1 + + test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return + + local pid + pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || : + if [ -n "${pid}" ] + then + kill ${pid} + for s in 1 2 4 8 16 32; do + sleep $s + ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break + done + ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' + fi + rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}") + rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}") + rm -f $(daemon_pid_file "${cluster}") +} + +admin_daemon() +{ + local cluster=$1 ; shift + + local asok_file=$(daemon_asok_file "${cluster}" "${cluster}") + test -S "${asok_file}" + + ceph --admin-daemon ${asok_file} $@ +} + +status() +{ + local cluster daemon image_pool image + + for cluster in ${CLUSTER1} ${CLUSTER2} + do + echo "${cluster} status" + ceph --cluster ${cluster} -s + echo + + for image_pool in ${POOL} ${PARENT_POOL} + do + echo "${cluster} ${image_pool} images" + rbd --cluster ${cluster} -p ${image_pool} ls + echo + + echo "${cluster} ${image_pool} mirror pool status" + rbd --cluster ${cluster} -p ${image_pool} mirror pool status --verbose + echo + + for image in `rbd --cluster ${cluster} -p ${image_pool} ls 2>/dev/null` + do + echo "image ${image} info" + rbd --cluster ${cluster} -p ${image_pool} info ${image} + echo + echo "image ${image} journal status" + rbd --cluster ${cluster} -p ${image_pool} journal status --image ${image} + echo + done + done + done + + local ret + + for cluster in "${CLUSTER1}" "${CLUSTER2}" + do + local pid_file=$(daemon_pid_file ${cluster} ) + if [ ! -e ${pid_file} ] + then + echo "${cluster} rbd-mirror not running or unknown" \ + "(${pid_file} not exist)" + continue + fi + + local pid + pid=$(cat ${pid_file} 2>/dev/null) || : + if [ -z "${pid}" ] + then + echo "${cluster} rbd-mirror not running or unknown" \ + "(can't find pid using ${pid_file})" + ret=1 + continue + fi + + echo "${daemon} rbd-mirror process in ps output:" + if ps auxww | + awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}' + then + echo + echo "${cluster} rbd-mirror not running" \ + "(can't find pid $pid in ps output)" + ret=1 + continue + fi + echo + + local asok_file=$(daemon_asok_file ${cluster} ${cluster}) + if [ ! -S "${asok_file}" ] + then + echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)" + ret=1 + continue + fi + + echo "${cluster} rbd-mirror status" + ceph --admin-daemon ${asok_file} rbd mirror status + echo + done + + return ${ret} +} + +flush() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local cmd="rbd mirror flush" + + if [ -n "${image}" ] + then + cmd="${cmd} ${pool}/${image}" + fi + + admin_daemon "${cluster}" ${cmd} +} + +test_image_replay_state() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local test_state=$4 + local current_state=stopped + + admin_daemon "${cluster}" help | + fgrep "\"rbd mirror status ${pool}/${image}\"" && + admin_daemon "${cluster}" rbd mirror status ${pool}/${image} | + grep -i 'state.*Replaying' && + current_state=started + + test "${test_state}" = "${current_state}" +} + +wait_for_image_replay_state() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state=$4 + local s + + # TODO: add a way to force rbd-mirror to update replayers + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep ${s} + test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0 + done + return 1 +} + +wait_for_image_replay_started() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started +} + +wait_for_image_replay_stopped() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped +} + +get_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local id_regexp=$4 + + # Parse line like below, looking for the first position + # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]] + + local status_log=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.status + rbd --cluster ${cluster} -p ${pool} journal status --image ${image} | + tee ${status_log} >&2 + sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*$/\1/p' \ + ${status_log} +} + +get_master_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + get_position "${cluster}" "${pool}" "${image}" '' +} + +get_mirror_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + get_position "${cluster}" "${pool}" "${image}" '..*' +} + +wait_for_replay_complete() +{ + local local_cluster=$1 + local cluster=$2 + local pool=$3 + local image=$4 + local s master_pos mirror_pos + + for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do + sleep ${s} + flush "${local_cluster}" "${pool}" "${image}" + master_pos=$(get_master_position "${cluster}" "${pool}" "${image}") + mirror_pos=$(get_mirror_position "${cluster}" "${pool}" "${image}") + test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0 + done + return 1 +} + +test_status_in_pool_dir() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state_pattern=$4 + local description_pattern=$5 + + local status_log=${TEMPDIR}/${cluster}-${image}.mirror_status + rbd --cluster ${cluster} -p ${pool} mirror image status ${image} | + tee ${status_log} + grep "state: .*${state_pattern}" ${status_log} + grep "description: .*${description_pattern}" ${status_log} +} + +create_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} -p ${pool} create --size 128 \ + --image-feature layering,exclusive-lock,journaling ${image} +} + +remove_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} -p ${pool} rm ${image} +} + +remove_image_retry() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + for s in 1 2 4 8 16 32; do + remove_image ${cluster} ${pool} ${image} && return 0 + sleep ${s} + done + return 1 +} + +clone_image() +{ + local cluster=$1 + local parent_pool=$2 + local parent_image=$3 + local parent_snap=$4 + local clone_pool=$5 + local clone_image=$6 + + rbd --cluster ${cluster} clone ${parent_pool}/${parent_image}@${parent_snap} \ + ${clone_pool}/${clone_image} --image-feature layering,exclusive-lock,journaling +} + +create_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap} +} + +remove_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} -p ${pool} snap rm ${image}@${snap} +} + +purge_snapshots() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} -p ${pool} snap purge ${image} +} + +protect_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} -p ${pool} snap protect ${image}@${snap} +} + +unprotect_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} -p ${pool} snap unprotect ${image}@${snap} +} + +wait_for_snap_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap_name=$4 + local s + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do + sleep ${s} + rbd --cluster ${cluster} -p ${pool} info ${image}@${snap_name} || continue + return 0 + done + return 1 +} + +write_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local count=$4 + + rbd --cluster ${cluster} -p ${pool} bench-write ${image} \ + --io-size 4096 --io-threads 1 --io-total $((4096 * count)) \ + --io-pattern rand +} + +stress_write_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local duration=$(awk 'BEGIN {srand(); print int(35 * rand()) + 15}') + + timeout ${duration}s ceph_test_rbd_mirror_random_write \ + --cluster ${cluster} ${pool} ${image} \ + --debug-rbd=20 --debug-journaler=20 \ + 2> ${TEMPDIR}/rbd-mirror-random-write.log || true +} + +compare_images() +{ + local pool=$1 + local image=$2 + + local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export + local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export + + rm -f ${rmt_export} ${loc_export} + rbd --cluster ${CLUSTER2} -p ${pool} export ${image} ${rmt_export} + rbd --cluster ${CLUSTER1} -p ${pool} export ${image} ${loc_export} + cmp ${rmt_export} ${loc_export} + rm -f ${rmt_export} ${loc_export} +} + +demote_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image demote ${pool}/${image} +} + +promote_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image promote ${pool}/${image} +} + +set_pool_mirror_mode() +{ + local cluster=$1 + local pool=$2 + local mode=$3 + + rbd --cluster=${cluster} -p ${pool} mirror pool enable ${mode} +} + +disable_mirror() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image disable ${pool}/${image} +} + +enable_mirror() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image enable ${pool}/${image} +} + +test_image_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local test_state=$4 + local current_state=deleted + + rbd --cluster=${cluster} -p ${pool} ls | grep "^${image}$" && + current_state=present + + test "${test_state}" = "${current_state}" +} + +wait_for_image_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state=$4 + local s + + # TODO: add a way to force rbd-mirror to update replayers + for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do + sleep ${s} + test_image_present "${cluster}" "${pool}" "${image}" "${state}" && return 0 + done + return 1 +} + +# +# Main +# + +if [ "$#" -gt 0 ] +then + if [ -z "${RBD_MIRROR_TEMDIR}" ] + then + echo "RBD_MIRROR_TEMDIR is not set" >&2 + exit 1 + fi + + TEMPDIR="${RBD_MIRROR_TEMDIR}" + cd ${TEMPDIR} + $@ + exit $? +fi + +set -xe + +setup diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh index 762e98db76400..1a234428da5ba 100755 --- a/qa/workunits/rbd/rbd_mirror_stress.sh +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -1,412 +1,149 @@ -#!/bin/bash +#!/bin/sh # -# rbd_mirror.sh - test rbd-mirror daemon +# rbd_mirror_stress.sh - stress test rbd-mirror daemon # -# The scripts starts two ("local" and "remote") clusters using mstart.sh script, -# creates a temporary directory, used for cluster configs, daemon logs, admin -# socket, temporary files, and launches rbd-mirror daemon. -# -# There are several env variables useful when troubleshooting a test failure: -# -# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes, -# destroy the clusters and remove the temp directory) -# on exit, so it is possible to check the test state -# after failure. -# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory -# (should not exist) instead of running mktemp(1). -# -# The cleanup can be done as a separate step, running the script with -# `cleanup ${RBD_MIRROR_TEMDIR}' arguments. -# -# Note, as other workunits tests, rbd_mirror.sh expects to find ceph binaries -# in PATH. -# -# Thus a typical troubleshooting session: -# -# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with -# TEMPDIR pointing to a known location: -# -# cd $CEPH_SRC_PATH -# PATH=$CEPH_SRC_PATH:$PATH -# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror.sh -# -# After the test failure cd to TEMPDIR and check the current state: -# -# cd /tmp/tmp.rbd_mirror -# ls -# less rbd-mirror.cluster1_daemon.$pid.log -# ceph --cluster cluster1 -s -# ceph --cluster cluster1 -s -# rbd --cluster cluster2 -p mirror ls -# rbd --cluster cluster2 -p mirror journal status --image test -# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help -# ... -# -# Also you can execute commands (functions) from the script: -# -# cd $CEPH_SRC_PATH -# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror -# ../qa/workunits/rbd/rbd_mirror.sh status -# ../qa/workunits/rbd/rbd_mirror.sh stop_mirror cluster1 -# ../qa/workunits/rbd/rbd_mirror.sh start_mirror cluster2 -# ../qa/workunits/rbd/rbd_mirror.sh flush cluster2 -# ... -# -# Eventually, run the cleanup: -# -# cd $CEPH_SRC_PATH -# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror.sh cleanup -# - -CLUSTER1=cluster1 -CLUSTER2=cluster2 -POOL=mirror -SRC_DIR=$(readlink -f $(dirname $0)/../../../src) -TEMPDIR= - -# These vars facilitate running this script in an environment with -# ceph installed from packages, like teuthology. These are not defined -# by default. -# -# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters -# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror -# running as ceph client $CEPH_ID. If empty, -# this script will start and stop rbd-mirror -# -# Functions -# - -daemon_asok_file() -{ - local local_cluster=$1 - local cluster=$2 - - if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - echo $(ceph-conf --cluster $local_cluster --name "client.${CEPH_ID}" 'admin socket') - else - echo "${TEMPDIR}/rbd-mirror.${local_cluster}_daemon.${cluster}.asok" - fi -} - -daemon_pid_file() -{ - local cluster=$1 - - if [ -n "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - echo $(ceph-conf --cluster $cluster --name "client.${CEPH_ID}" 'pid file') - else - echo "${TEMPDIR}/rbd-mirror.${cluster}_daemon.pid" - fi -} - -testlog() -{ - echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" -} - -setup() -{ - local c - trap cleanup INT TERM EXIT - - if [ -n "${RBD_MIRROR_TEMDIR}" ]; then - mkdir "${RBD_MIRROR_TEMDIR}" - TEMPDIR="${RBD_MIRROR_TEMDIR}" - else - TEMPDIR=`mktemp -d` - fi - - if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then - cd ${SRC_DIR} - ./mstart.sh ${CLUSTER1} -n - ./mstart.sh ${CLUSTER2} -n - - ln -s $(readlink -f run/${CLUSTER1}/ceph.conf) \ - ${TEMPDIR}/${CLUSTER1}.conf - ln -s $(readlink -f run/${CLUSTER2}/ceph.conf) \ - ${TEMPDIR}/${CLUSTER2}.conf - - cd ${TEMPDIR} - fi - - ceph --cluster ${CLUSTER1} osd pool create ${POOL} 64 64 - ceph --cluster ${CLUSTER2} osd pool create ${POOL} 64 64 - - rbd --cluster ${CLUSTER1} mirror pool enable ${POOL} pool - rbd --cluster ${CLUSTER2} mirror pool enable ${POOL} pool - rbd --cluster ${CLUSTER1} mirror pool peer add ${POOL} ${CLUSTER2} - rbd --cluster ${CLUSTER2} mirror pool peer add ${POOL} ${CLUSTER1} -} - -cleanup() -{ - test -n "${RBD_MIRROR_NOCLEANUP}" && return - - set +e - - stop_mirror "${CLUSTER1}" - stop_mirror "${CLUSTER2}" - - if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then - cd ${SRC_DIR} - ./mstop.sh ${CLUSTER1} - ./mstop.sh ${CLUSTER2} - else - ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it - ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it - fi - rm -Rf ${TEMPDIR} -} - -start_mirror() -{ - local cluster=$1 +IMAGE_COUNT=50 +export LOCKDEP=0 - test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return - - rbd-mirror \ - --cluster ${cluster} \ - --pid-file=$(daemon_pid_file "${cluster}") \ - --log-file=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.\$pid.log \ - --admin-socket=${TEMPDIR}/rbd-mirror.${cluster}_daemon.\$cluster.asok \ - --debug-rbd=30 --debug-journaler=30 \ - --debug-rbd_mirror=30 \ - --daemonize=true -} - -stop_mirror() -{ - local cluster=$1 - - test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return - - local pid - pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || : - if [ -n "${pid}" ] - then - kill ${pid} - for s in 1 2 4 8 16 32; do - sleep $s - ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break - done - ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' - fi - rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}") - rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}") - rm -f $(daemon_pid_file "${cluster}") -} - -admin_daemon() -{ - local cluster=$1 ; shift - - local asok_file=$(daemon_asok_file "${cluster}" "${cluster}") - test -S "${asok_file}" - - ceph --admin-daemon ${asok_file} $@ -} +if [ -n "${CEPH_REF}" ]; then + wget -O rbd_mirror_helpers.sh "https://git.ceph.com/?p=ceph.git;a=blob_plain;hb=$CEPH_REF;f=qa/workunits/rbd/rbd_mirror_helpers.sh" + . rbd_mirror_helpers.sh +else + . $(dirname $0)/rbd_mirror_helpers.sh +fi -flush() +create_snap() { local cluster=$1 - local image=$2 - local cmd="rbd mirror flush" - - if [ -n "${image}" ] - then - cmd="${cmd} ${POOL}/${image}" - fi + local pool=$2 + local image=$3 + local snap_name=$4 - admin_daemon "${cluster}" ${cmd} + rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap_name} \ + --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log } -test_image_replay_state() +compare_image_snaps() { - local cluster=$1 + local pool=$1 local image=$2 - local test_state=$3 - local current_state=stopped + local snap_name=$3 - admin_daemon "${cluster}" help | - fgrep "\"rbd mirror status ${POOL}/${image}\"" && - admin_daemon "${cluster}" rbd mirror status ${POOL}/${image} | - grep -i 'state.*Replaying' && - current_state=started + local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export + local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export - test "${test_state}" = "${current_state}" + rm -f ${rmt_export} ${loc_export} + rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export} + rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export} + cmp ${rmt_export} ${loc_export} } -wait_for_image_replay_state() +wait_for_pool_healthy() { local cluster=$1 - local image=$2 - local state=$3 + local pool=$2 + local image_count=$3 local s + local count + local state - # TODO: add a way to force rbd-mirror to update replayers - for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do - sleep ${s} - test_image_replay_state "${cluster}" "${image}" "${state}" && return 0 + for s in `seq 1 40`; do + sleep 30 + count=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'images: ') + test "${count}" = "images: ${image_count} total" || continue + + state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'health:') + test "${state}" = "health: ERROR" && return 1 + test "${state}" = "health: OK" && return 0 done return 1 } -wait_for_image_replay_started() -{ - local cluster=$1 - local image=$2 - - wait_for_image_replay_state "${cluster}" "${image}" started -} - -get_position() -{ - local cluster=$1 - local image=$2 - local id_regexp=$3 - - # Parse line like below, looking for the first position - # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]] - - local status_log=${TEMPDIR}/${CLUSTER2}-${POOL}-${image}.status - rbd --cluster ${cluster} -p ${POOL} journal status --image ${image} | - tee ${status_log} >&2 - sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*$/\1/p' \ - ${status_log} -} - -get_master_position() -{ - local cluster=$1 - local image=$2 - - get_position "${cluster}" "${image}" '' -} - -get_mirror_position() -{ - local cluster=$1 - local image=$2 - - get_position "${cluster}" "${image}" '..*' -} - -test_status_in_pool_dir() -{ - local cluster=$1 - local image=$2 - local state_pattern=$3 - local description_pattern=$4 - - local status_log=${TEMPDIR}/${cluster}-${image}.mirror_status - rbd --cluster ${cluster} -p ${POOL} mirror image status ${image} | - tee ${status_log} - grep "state: .*${state_pattern}" ${status_log} - grep "description: .*${description_pattern}" ${status_log} -} - -create_image() -{ - local cluster=$1 - local image=$2 - local size=$3 - - rbd --cluster ${cluster} -p ${POOL} create --size ${size} \ - --image-feature exclusive-lock --image-feature journaling ${image} -} - -write_image() -{ - local cluster=$1 - local image=$2 - local duration=$(($RANDOM % 35 + 15)) - - timeout ${duration}s ceph_test_rbd_mirror_random_write \ - --cluster ${cluster} ${POOL} ${image} \ - --debug-rbd=20 --debug-journaler=20 \ - 2> ${TEMPDIR}/rbd-mirror-random-write.log || true -} +start_mirror ${CLUSTER1} +start_mirror ${CLUSTER2} -create_snap() -{ - local cluster=$1 - local image=$2 - local snap_name=$3 +testlog "TEST: add image and test replay after client crashes" +image=test +create_image ${CLUSTER2} ${POOL} ${image} '512M' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - rbd --cluster ${cluster} -p ${POOL} snap create ${image}@${snap_name} \ - --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log -} +for i in `seq 1 10` +do + stress_write_image ${CLUSTER2} ${POOL} ${image} -wait_for_snap() -{ - local cluster=$1 - local image=$2 - local snap_name=$3 - local s + test_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position' - for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do - sleep ${s} - rbd --cluster ${cluster} -p ${POOL} info ${image}@${snap_name} || continue - return 0 - done - return 1 -} + snap_name="snap${i}" + create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name} + wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name} + compare_image_snaps ${POOL} ${image} ${snap_name} +done -compare_images() -{ - local image=$1 - local snap_name=$2 +for i in `seq 1 10` +do + snap_name="snap${i}" + remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name} +done - local rmt_export=${TEMPDIR}/${CLUSTER2}-${POOL}-${image}.export - local loc_export=${TEMPDIR}/${CLUSTER1}-${POOL}-${image}.export +remove_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' - rm -f ${rmt_export} ${loc_export} - rbd --cluster ${CLUSTER2} -p ${POOL} export ${image}@${snap_name} ${rmt_export} - rbd --cluster ${CLUSTER1} -p ${POOL} export ${image}@${snap_name} ${loc_export} - cmp ${rmt_export} ${loc_export} -} +testlog "TEST: create many images" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + create_image ${CLUSTER2} ${POOL} ${image} '128M' + write_image ${CLUSTER2} ${POOL} ${image} 100 +done -# -# Main -# +wait_for_pool_healthy ${CLUSTER2} ${POOL} ${IMAGE_COUNT} +wait_for_pool_healthy ${CLUSTER1} ${POOL} ${IMAGE_COUNT} -if [ "$#" -gt 0 ] -then - if [ -z "${RBD_MIRROR_TEMDIR}" ] - then - echo "RBD_MIRROR_TEMDIR is not set" >&2 - exit 1 - fi +testlog "TEST: compare many images" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + compare_images ${POOL} ${image} +done - TEMPDIR="${RBD_MIRROR_TEMDIR}" - cd ${TEMPDIR} - $@ - exit $? -fi +testlog "TEST: delete many images" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + remove_image ${CLUSTER2} ${POOL} ${image} +done -set -xe +testlog "TEST: image deletions should propagate" +wait_for_pool_healthy ${CLUSTER1} ${POOL} 0 +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +done -setup +testlog "TEST: delete images during bootstrap" +set_pool_mirror_mode ${CLUSTER1} ${POOL} 'image' +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' -testlog "TEST: add image and test replay" start_mirror ${CLUSTER1} image=test -create_image ${CLUSTER2} ${image} '512M' -wait_for_image_replay_started ${CLUSTER1} ${image} for i in `seq 1 10` do - write_image ${CLUSTER2} ${image} + image="image_${i}" + create_image ${CLUSTER2} ${POOL} ${image} '512M' + enable_mirror ${CLUSTER2} ${POOL} ${image} - test_status_in_pool_dir ${CLUSTER1} ${image} 'up+replaying' 'master_position' + stress_write_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' - snap_name="snap${i}" - create_snap ${CLUSTER2} ${image} ${snap_name} - wait_for_snap ${CLUSTER1} ${image} ${snap_name} - compare_images ${image} ${snap_name} + disable_mirror ${CLUSTER2} ${POOL} ${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' + purge_snapshots ${CLUSTER2} ${POOL} ${image} + remove_image_retry ${CLUSTER2} ${POOL} ${image} done echo OK -- 2.39.5