From: cuixf Date: Thu, 24 May 2018 00:01:25 +0000 (-0400) Subject: osd: retry to read object attrs at EC recovery X-Git-Tag: v14.0.1~1206^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=3eb1679b1fadb6adb6ff4ed3f1f6069a85f4bbcd;p=ceph.git osd: retry to read object attrs at EC recovery In EC recovery read, if the object's attrs read failed or with errors, we erase the attrs we have read and try to read it again from left shards. This will make the primary osd get the object's attrs correct and avoid assert. Signed-off-by: xiaofei cui --- diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh index 309df13340cf7..a37258b2fd863 100755 --- a/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -353,6 +353,37 @@ function TEST_rados_get_with_subreadall_eio_shard_1() { delete_erasure_coded_pool $poolname } +# Test recovery the object attr read error +function TEST_ec_object_attr_read_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + local primary_osd=$(get_primary $poolname $objname) + # Kill primary OSD + kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 + + # Write data + rados_put $dir $poolname $objname || return 1 + + # Inject eio, shard 1 is the one read attr + inject_eio ec mdata $poolname $objname $dir 1 || return 1 + + # Restart OSD + run_osd $dir ${primary_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + # Test recovery the first k copies aren't all available function TEST_ec_single_recovery_error() { local dir=$1 diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 50eb7583579a0..151e35e414168 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1059,6 +1059,8 @@ error: *i, ghobject_t::NO_GEN, shard), reply->attrs_read[*i]); if (r < 0) { + // If we read error, we should not return the attrs too. + reply->attrs_read.erase(*i); reply->buffers_read.erase(*i); reply->errors[*i] = r; } @@ -2342,13 +2344,21 @@ int ECBackend::send_all_remaining_reads( GenContext &> *c = rop.to_read.find(hoid)->second.cb; + // (Note cuixf) If we need to read attrs and we read failed, try to read again. + bool want_attrs = + rop.to_read.find(hoid)->second.want_attrs && + (!rop.complete[hoid].attrs || rop.complete[hoid].attrs->empty()); + if (want_attrs) { + dout(10) << __func__ << " want attrs again" << dendl; + } + rop.to_read.erase(hoid); rop.to_read.insert(make_pair( hoid, read_request_t( offsets, shards, - false, + want_attrs, c))); do_read_op(rop); return 0;