From 3eb1679b1fadb6adb6ff4ed3f1f6069a85f4bbcd Mon Sep 17 00:00:00 2001 From: cuixf Date: Wed, 23 May 2018 20:01:25 -0400 Subject: [PATCH] osd: retry to read object attrs at EC recovery In EC recovery read, if the object's attrs read failed or with errors, we erase the attrs we have read and try to read it again from left shards. This will make the primary osd get the object's attrs correct and avoid assert. Signed-off-by: xiaofei cui --- .../erasure-code/test-erasure-eio.sh | 31 +++++++++++++++++++ src/osd/ECBackend.cc | 12 ++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh index 309df13340cf7..a37258b2fd863 100755 --- a/qa/standalone/erasure-code/test-erasure-eio.sh +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -353,6 +353,37 @@ function TEST_rados_get_with_subreadall_eio_shard_1() { delete_erasure_coded_pool $poolname } +# Test recovery the object attr read error +function TEST_ec_object_attr_read_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + local primary_osd=$(get_primary $poolname $objname) + # Kill primary OSD + kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 + + # Write data + rados_put $dir $poolname $objname || return 1 + + # Inject eio, shard 1 is the one read attr + inject_eio ec mdata $poolname $objname $dir 1 || return 1 + + # Restart OSD + run_osd $dir ${primary_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + # Test recovery the first k copies aren't all available function TEST_ec_single_recovery_error() { local dir=$1 diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 50eb7583579a0..151e35e414168 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1059,6 +1059,8 @@ error: *i, ghobject_t::NO_GEN, shard), reply->attrs_read[*i]); if (r < 0) { + // If we read error, we should not return the attrs too. + reply->attrs_read.erase(*i); reply->buffers_read.erase(*i); reply->errors[*i] = r; } @@ -2342,13 +2344,21 @@ int ECBackend::send_all_remaining_reads( GenContext &> *c = rop.to_read.find(hoid)->second.cb; + // (Note cuixf) If we need to read attrs and we read failed, try to read again. + bool want_attrs = + rop.to_read.find(hoid)->second.want_attrs && + (!rop.complete[hoid].attrs || rop.complete[hoid].attrs->empty()); + if (want_attrs) { + dout(10) << __func__ << " want attrs again" << dendl; + } + rop.to_read.erase(hoid); rop.to_read.insert(make_pair( hoid, read_request_t( offsets, shards, - false, + want_attrs, c))); do_read_op(rop); return 0; -- 2.39.5