From: David Zafman Date: Fri, 22 Sep 2017 00:48:28 +0000 (-0700) Subject: osd: Allow recovery to send additional reads X-Git-Tag: v12.2.3~146^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=62e9c05af9c7b1de44ebe4e055e84a419e0542b8;p=ceph.git osd: Allow recovery to send additional reads For now it doesn't include non-acting OSDs Added test for this case Signed-off-by: David Zafman (cherry picked from commit 1235810c2ad08ccb7ef5946686eb2b85798f5bca) --- diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh index b7a14097c11..f8b678b561b 100755 --- a/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -380,6 +380,34 @@ function TEST_rados_get_with_subreadall_eio_shard_1() { delete_pool $poolname } +# Test recovery the first k copies aren't all available +function TEST_ec_recovery_errors() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname $objname || return 1 + inject_eio ec data $poolname $objname $dir 0 || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + #rados_get_data_recovery eio $dir $shard_id || return 1 + + delete_pool $poolname +} + main test-erasure-eio "$@" # Local Variables: diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 40be6a31d4d..8e11122d37d 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1187,8 +1187,7 @@ void ECBackend::handle_sub_read_reply( unsigned is_complete = 0; // For redundant reads check for completion as each shard comes in, // or in a non-recovery read check for completion once all the shards read. - // TODO: It would be nice if recovery could send more reads too - if (rop.do_redundant_reads || (!rop.for_recovery && rop.in_progress.empty())) { + if (rop.do_redundant_reads || rop.in_progress.empty()) { for (map::const_iterator iter = rop.complete.begin(); iter != rop.complete.end(); @@ -1204,6 +1203,7 @@ void ECBackend::handle_sub_read_reply( set want_to_read, dummy_minimum; get_want_to_read_shards(&want_to_read); int err; + // TODO: Should we include non-acting nodes here when for_recovery is set? if ((err = ec_impl->minimum_to_decode(want_to_read, have, &dummy_minimum)) < 0) { dout(20) << __func__ << " minimum_to_decode failed" << dendl; if (rop.in_progress.empty()) {