From: David Zafman Date: Mon, 2 Oct 2017 20:51:17 +0000 (-0700) Subject: test: Test case that reproduces tracker 18162 X-Git-Tag: v12.2.3~146^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a09a42a4a850ee6f5e64fc0dc79995dbbfbdf13d;p=ceph.git test: Test case that reproduces tracker 18162 recover_replicas: object added to missing set for backfill, but is not in recovering, error! Signed-off-by: David Zafman (cherry picked from commit b9de5eec267627c8bc5ff0759ddea6c4a8aa7bce) --- diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh index 32016f3aa090..2f8769abb074 100755 --- a/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -56,6 +56,13 @@ function setup_osds() { grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 } +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".[] | select(.pgid==\"$pgid\") | .$sname" +} + function create_erasure_coded_pool() { local poolname=$1 shift @@ -408,6 +415,158 @@ function TEST_ec_recovery_errors() { delete_pool $poolname } +# Test backfill with errors present +function TEST_ec_backfill_errors() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + export CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 5 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + run_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + while(true); do + state=$(get_state 2.0) + echo $state | grep -v backfilling + if [ "$?" = "0" ]; then + break + fi + echo -n "$state " + done + + ceph pg dump pgs + ceph pg 2.0 list_missing | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +# Test recovery with errors present +function TEST_ec_recovery_errors() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + setup_osds 5 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + run_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + while(true); do + state=$(get_state 2.0) + echo $state | grep -v recovering + if [ "$?" = "0" ]; then + break + fi + echo -n "$state " + done + + ceph pg dump pgs + ceph pg 2.0 list_missing | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + main test-erasure-eio "$@" # Local Variables: