test: test_lost.sh: update tests

author Mykola Golub <mgolub@mirantis.com>

Mon, 23 Mar 2015 09:41:03 +0000 (11:41 +0200)

committer Mykola Golub <mgolub@mirantis.com>

Wed, 25 Mar 2015 07:32:03 +0000 (09:32 +0200)
author Mykola Golub <mgolub@mirantis.com>
Mon, 23 Mar 2015 09:41:03 +0000 (11:41 +0200)
committer Mykola Golub <mgolub@mirantis.com>
Wed, 25 Mar 2015 07:32:03 +0000 (09:32 +0200)
diff --git a/src/test/test_common.sh b/src/test/test_common.sh

index 5fa2e4032521d5127fc3d1a797637d0feb92915b..5ab89f1b93f6176f7aac934da61f33390e202454 100755 (executable)
--- a/src/test/test_common.sh
+++ b/src/test/test_common.sh
@@ -38,6 +38,21 @@ die() {
          exit 1
  }
  
+# Test that flag is set (the element is found in the list)
+is_set()
+{
+       local flag=$1; shift
+       local flags="$@"
+       local i
+
+       for i in ${flags}; do
+               if [ "${flag}" = "${i}" ]; then
+                       return 0
+               fi
+       done
+       return 1
+}
+
  # Stop an OSD started by vstart
  stop_osd() {
          osd_index=$1
diff --git a/src/test/test_lost.sh b/src/test/test_lost.sh

index 2fb7587c2887b153368c112e5591de9a8daedaa8..4a976c03c3968e133040500f36a502e2632af3d7 100755 (executable)
--- a/src/test/test_lost.sh
+++ b/src/test/test_lost.sh
@@ -19,6 +19,26 @@ setup() {
  
          # set recovery start to a really long time to ensure that we don't start recovery
          ./vstart.sh -d -n -o "$vstart_config" || die "vstart failed"
+
+       # for exiting pools set size not greater than number of OSDs,
+       # so recovery from degraded ps is possible
+       local changed=0
+       for pool in `./ceph osd pool ls`; do
+           local size=`./ceph osd pool get ${pool} size | awk '{print $2}'`
+           if [ "${size}" -gt "${CEPH_NUM_OSD}" ]; then
+               ./ceph osd pool set ${pool} size ${CEPH_NUM_OSD}
+               changed=1
+           fi
+       done
+       if [ ${changed} -eq 1 ]; then
+           # XXX: When a pool has degraded pgs due to size greater than number
+           # of OSDs, after decreasing the size the recovery still could stuck
+           # and requires an additional kick.
+           ./ceph osd out 0
+           ./ceph osd in 0
+       fi
+
+       poll_cmd "./ceph health" HEALTH_OK 1 30
  }
  
  recovery1_impl() {
@@ -64,7 +84,15 @@ recovery1() {
          recovery1_impl
  }
  
-make_unfound() {
+lost1_impl() {
+       local flags="$@"
+       local lost_action=delete
+       local pgs_unfound pg
+
+       if is_set revert_lost $flags; then
+           lost_action=revert
+       fi
+
          # Write lots and lots of objects
          write_objects 1 1 20 8000 $TEST_POOL
  
@@ -88,14 +116,21 @@ make_unfound() {
         # Since recovery can't proceed, stuff should be unfound.
         poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
          [ $? -eq 1 ] || die "Failed to see unfound objects."
-}
  
-lost1_impl() {
-       try_to_fetch_unfound=$1
+       pgs_unfound=`./ceph health detail |awk '$1 = "pg" && /[0-9] unfound$/ {print $2}'`
  
-       make_unfound
+       [ -n "$pgs_unfound" ] || die "no pg with unfound objects"
  
-       if [ "$try_to_fetch_unfound" -eq 1 ]; then
+       for pg in $pgs_unfound; do
+           ./ceph pg $pg mark_unfound_lost revert &&
+           die "mark_unfound_lost unexpectedly succeeded for pg $pg"
+       done
+
+       if ! is_set mark_osd_lost $flags && ! is_set rm_osd $flags; then
+           return
+       fi
+
+       if is_set try_to_fetch_unfound $flags; then
           # Ask for an object while it's still unfound, and
           # verify we get woken to an error when it's declared lost.
           echo "trying to get one of the unfound objects"
@@ -105,19 +140,43 @@ lost1_impl() {
           ) &
         fi
  
-        # Lose all objects.
-       ./ceph osd lost 0 --yes-i-really-mean-it
+       if is_set mark_osd_lost $flags; then
+         ./ceph osd lost 0 --yes-i-really-mean-it
+       fi
+
+       if is_set rm_osd $flags; then
+           ./ceph osd rm 0
+       fi
+
+       if ! is_set auto_mark_unfound_lost $flags; then
+           for pg in $pgs_unfound; do
+               ./ceph pg $pg mark_unfound_lost ${lost_action} ||
+                 die "mark_unfound_lost failed for pg $pg"
+           done
+       fi
+
+       start_recovery 2
  
         # Unfound objects go away and are turned into lost objects.
         poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
          [ $? -eq 1 ] || die "Unfound objects didn't go away."
  
+       for pg in `ceph pg ls | awk '/^[0-9]/ {print $1}'`; do
+           ./ceph pg $pg mark_unfound_lost revert 2>&1 |
+             grep 'pg has no unfound objects' ||
+             die "pg $pg has unfound objects"
+       done
+
         # Reading from a lost object gives back an error code.
         # TODO: check error code
-       ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 &&\
+       ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01
+       if [ lost_action = delete -a $? -eq 0 ]; then
           die "expected radostool error"
+       elif [ lost_action = revert -a $? -ne 0 ]; then
+         die "unexpected radostool error"
+       fi
  
-       if [ "$try_to_fetch_unfound" -eq 1 ]; then
+       if is_set try_to_fetch_unfound $flags; then
           echo "waiting for the try_to_fetch_unfound \
  radostool instance to finish"
           wait
@@ -126,77 +185,31 @@ radostool instance to finish"
  
  lost1() {
          setup 2 'osd recovery delay start = 10000'
-        lost1_impl 0
+        lost1_impl mark_osd_lost revert_lost
  }
  
  lost2() {
          setup 2 'osd recovery delay start = 10000'
-        lost1_impl 1
+        lost1_impl mark_osd_lost try_to_fetch_unfound
  }
  
-mark_unfound_lost1_impl() {
-       local mark_osd_lost=$1
-       local rm_osd=$2
-       local pgs_unfound pg
-
-       make_unfound
-
-       pgs_unfound=`./ceph health detail |awk '$1 = "pg" && /[0-9] unfound$/ {print $2}'`
-
-       [ -n "$pgs_unfound" ] || die "no pg with unfound objects"
-
-       for pg in $pgs_unfound; do
-           ceph pg $pg mark_unfound_lost revert &&
-             die "mark_unfound_lost unexpectedly succeeded for pg $pg"
-       done
-
-       if [ "$mark_osd_lost" -ne 1 -a "$rm_osd" -ne 1 ]; then
-           return
-       fi
-
-       if [ "$mark_osd_lost" -eq 1 ]; then
-         ./ceph osd lost 0 --yes-i-really-mean-it
-       fi
-
-       if [ "$rm_osd" -eq 1 ]; then
-           ./ceph osd rm 0
-       fi
-
-       for pg in $pgs_unfound; do
-           ceph pg $pg mark_unfound_lost revert ||
-             die "mark_unfound_lost failed for pg $pg"
-       done
-
-       start_recovery 2
-
-       # Unfound objects go away and are turned into lost objects.
-       poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
-        [ $? -eq 1 ] || die "Unfound objects didn't go away."
-
-       for pg in `ceph pg ls | awk '/^[0-9]/ {print $1}'`; do
-           ceph pg $pg mark_unfound_lost revert 2>&1 |
-             grep 'pg has no unfound objects' ||
-             die "pg $pg has unfound objects"
-       done
-}
-
-mark_unfound_lost1() {
+lost3() {
          setup 2 'osd recovery delay start = 10000'
-        mark_unfound_lost1_impl 1 1
+        lost1_impl rm_osd
  }
  
-mark_unfound_lost2() {
+lost4() {
          setup 2 'osd recovery delay start = 10000'
-        mark_unfound_lost1_impl 1 0
+        lost1_impl mark_osd_lost rm_osd
  }
  
-mark_unfound_lost3() {
+lost5() {
          setup 2 'osd recovery delay start = 10000'
-        mark_unfound_lost1_impl 0 1
+        lost1_impl mark_osd_lost auto_mark_unfound_lost
  }
  
  all_osds_die_impl() {
-        poll_cmd "./ceph osd stat -o -" '3 up, 3 in' 20 240
+        poll_cmd "./ceph osd stat" '3 up, 3 in' 20 240
          [ $? -eq 1 ] || die "didn't start 3 osds"
  
          stop_osd 0
@@ -204,7 +217,7 @@ all_osds_die_impl() {
          stop_osd 2
  
         # wait for the MOSDPGStat timeout
-        poll_cmd "./ceph osd stat -o -" '0 up' 20 240
+        poll_cmd "./ceph osd stat" '0 up' 20 240
          [ $? -eq 1 ] || die "all osds weren't marked as down"
  }
  
@@ -221,15 +234,24 @@ run() {
  
          lost1 || die "test failed"
  
-        lost2 || die "test failed"
+       # XXX: try_to_fetch_unfound test currently hangs on "waiting for the
+       # try_to_fetch_unfound radostool instance to finish"
+       #lost2 || die "test failed"
  
-       mark_unfound_lost1 || die "test failed"
+       lost3 || die "test failed"
  
-       mark_unfound_lost2 || die "test failed"
+       lost4 || die "test failed"
  
-       mark_unfound_lost3 || die "test failed"
+       # XXX: automatically marking lost is not implemented
+       #lost5 || die "test failed"
  
          all_osds_die || die "test failed"
  }
  
+if [ -z "$@" ]; then
+       run
+       echo OK
+       exit 0
+fi
+
  $@
author	Mykola Golub <mgolub@mirantis.com>
	Mon, 23 Mar 2015 09:41:03 +0000 (11:41 +0200)
committer	Mykola Golub <mgolub@mirantis.com>
	Wed, 25 Mar 2015 07:32:03 +0000 (09:32 +0200)
src/test/test_common.sh		patch \| blob \| history
src/test/test_lost.sh		patch \| blob \| history