From: xie xingguo Date: Tue, 2 Apr 2019 08:17:52 +0000 (+0800) Subject: qa: add new test case for pulling error X-Git-Tag: v14.2.2~169^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0cc5c3aa27e9c9c5502dbc19bf6910d9643b221e;p=ceph.git qa: add new test case for pulling error Signed-off-by: xie xingguo (cherry picked from commit 6a8aedc1074d487510d8e546ec9e70e169523008) --- diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh index af4dfa3370bb..adf6fc7967a0 100755 --- a/qa/standalone/osd/osd-rep-recov-eio.sh +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -122,15 +122,27 @@ function rados_get_data() { COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") test "$COUNT" = "1" || return 1 + local object_osds=($(get_osds $poolname $objname)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname $objname || return 1 + + # Wait until automatic repair of bad peer is done + wait_for_clean || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 rados_get $dir $poolname $objname || return 1 COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') - test "$COUNT" = "2" || return 1 + test "$COUNT" = "3" || return 1 flush_pg_stats COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") - test "$COUNT" = "2" || return 1 + test "$COUNT" = "4" || return 1 inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 @@ -139,10 +151,10 @@ function rados_get_data() { # After hang another repair couldn't happen, so count stays the same COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') - test "$COUNT" = "2" || return 1 + test "$COUNT" = "3" || return 1 flush_pg_stats COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") - test "$COUNT" = "2" || return 1 + test "$COUNT" = "4" || return 1 } function TEST_rados_get_with_eio() { diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index 3270fb4976df..cea68f34256c 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -659,6 +659,7 @@ OPTION(osd_read_ec_check_for_errors, OPT_BOOL) // return error if any ec shard h // Only use clone_overlap for recovery if there are fewer than // osd_recover_clone_overlap_limit entries in the overlap set OPTION(osd_recover_clone_overlap_limit, OPT_INT) +OPTION(osd_debug_feed_pullee, OPT_INT) OPTION(osd_backfill_scan_min, OPT_INT) OPTION(osd_backfill_scan_max, OPT_INT) diff --git a/src/common/options.cc b/src/common/options.cc index 0d0b51f5cf2e..578a52d0fb21 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -3192,6 +3192,11 @@ std::vector